Time Until Large Events#
import pandas as pd
import numpy as np
import datetime as dt
csv_file = "../datasets/Formatted_ETAS_Output.csv"
etas = pd.read_csv(csv_file, sep = ',', lineterminator='\n')
csv_file = "../datasets/All (1960-2023).csv"
usgs = pd.read_csv(csv_file, sep = ',', lineterminator='\n', dtype={'time':str})
Show code cell output
C:\Users\Vishal\AppData\Local\Temp\ipykernel_18808\2239509985.py:4: DtypeWarning: Columns (1,2,3,4,6,7,8,9,15,16,17,18) have mixed types. Specify dtype option on import or set low_memory=False.
usgs = pd.read_csv(csv_file, sep = ',', lineterminator='\n', dtype={'time':str})
Data Filtering#
Converting the date columns to datetime
Date > 1960-01-01 and < 2023-01-01
Longitude > -123 and < -113
Latitude > 29 and < 39
Show code cell source
etas["Date"] = pd.to_datetime(etas["Date"], errors="coerce", format="%m/%d/%y")
etas.loc[etas["Date"].dt.year > pd.Timestamp.now().year, "Date"] -= pd.DateOffset(years=100)
etas = etas[(etas['Date'] > pd.to_datetime('1960-01-01')) & (etas['Date'] < pd.to_datetime('2023-01-01'))]
#filter the dataset by X > -123 and X < -113 and Y > 29 and Y < 39
etas = etas[etas['X'] > -123]
etas = etas[etas['X'] < -113]
etas = etas[etas['Y'] < 39]
etas = etas[etas['Y'] > 29]
etas.head()
| Date | Time | Year | X | Y | Magnitude | Z\r | |
|---|---|---|---|---|---|---|---|
| 1 | 1960-01-02 | 0:08:49.00 | 1960.006125 | -115.6222 | 33.0793 | 4.25 | 7.9322 |
| 2 | 1960-01-02 | 0:10:31.00 | 1960.007305 | -115.6323 | 33.1220 | 3.03 | 8.4015 |
| 3 | 1960-01-02 | 0:10:32.00 | 1960.007320 | -115.5851 | 33.0745 | 3.03 | 7.9678 |
| 4 | 1960-01-02 | 0:11:07.00 | 1960.007720 | -115.6256 | 33.0290 | 3.08 | 7.9737 |
| 5 | 1960-01-02 | 0:11:17.00 | 1960.007840 | -115.6050 | 33.0276 | 3.61 | 7.9322 |
Show code cell source
usgs["Date"] = pd.to_datetime(usgs["time"], errors="coerce").dt.strftime("%Y-%m-%d")
usgs.drop(columns=["time"], inplace=True)
usgs = usgs[(pd.to_datetime(usgs['Date']) > pd.to_datetime('1960-01-01')) & (pd.to_datetime(usgs['Date']) < pd.to_datetime('2023-01-01'))]
usgs['longitude'] = pd.to_numeric(usgs['longitude'], errors='coerce')
usgs['latitude'] = pd.to_numeric(usgs['latitude'], errors='coerce')
usgs['mag'] = pd.to_numeric(usgs['mag'], errors='coerce')
#filter the dataset by X > -123 and X < -113 and Y > 29 and Y < 39
usgs = usgs[usgs['longitude'] > -123]
usgs = usgs[usgs['longitude'] < -113]
usgs = usgs[usgs['latitude'] < 39]
usgs = usgs[usgs['latitude'] > 29]
time = []
for i in usgs['Date']:
time.append(pd.to_datetime(i))
usgs['Date'] = time
usgs.head()
| latitude | longitude | depth | mag | magType | nst | gap | dmin | rms | net | ... | place | type | horizontalError | depthError | magError | magNst | status | locationSource | magSource\r | Date | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 240 | 33.397500 | -116.393333 | 3.88 | 4.14 | mw | 132 | 16 | 0.07391 | 0.19 | ci | ... | 16 km N of Borrego Springs, CA | earthquake | 0.1 | 0.38 | NaN | 6 | reviewed | ci | ci\r | 2022-12-31 |
| 241 | 34.355667 | -116.921833 | 4.73 | 3.47 | mw | 121 | 25 | 0.07845 | 0.15 | ci | ... | 11km SSE of Lucerne Valley, CA | earthquake | 0.09 | 0.41 | NaN | 4 | reviewed | ci | ci\r | 2022-12-31 |
| 246 | 37.620167 | -122.025000 | 3.82 | 3.34 | mw | 141 | 16 | NaN | 0.16 | nc | ... | 3km N of Union City, CA | earthquake | 0.1 | 0.17 | NaN | 3 | reviewed | nc | nc\r | 2022-12-22 |
| 262 | 37.918167 | -122.304000 | 5.48 | 3.57 | mw | 170 | 19 | 0.01598 | 0.15 | nc | ... | 1km ENE of El Cerrito, CA | earthquake | 0.1 | 0.17 | NaN | 4 | reviewed | nc | nc\r | 2022-12-17 |
| 263 | 36.604667 | -121.209333 | 8.88 | 3.28 | ml | 67 | 55 | 0.03812 | 0.09 | nc | ... | 10km NW of Pinnacles, CA | earthquake | 0.14 | 0.28 | 0.129 | 72 | reviewed | nc | nc\r | 2022-12-13 |
5 rows × 22 columns
Data Grouping And Merging#
Data is grouped into 1 day chunks based on the max magnitude
max_mag_etas = pd.DataFrame(etas.groupby(etas['Date'].dt.to_period('D')).Magnitude.max())
max_mag_etas.reset_index(inplace=True)
max_mag_etas.head()
| Date | Magnitude | |
|---|---|---|
| 0 | 1960-01-02 | 4.25 |
| 1 | 1960-01-03 | 3.90 |
| 2 | 1960-01-04 | 4.24 |
| 3 | 1960-01-05 | 3.40 |
| 4 | 1960-01-06 | 3.47 |
max_mag_usgs = pd.DataFrame(usgs.groupby(usgs['Date'].dt.to_period('D')).mag.max())
max_mag_usgs.reset_index(inplace=True)
max_mag_usgs.head()
| Date | mag | |
|---|---|---|
| 0 | 1960-01-02 | 4.04 |
| 1 | 1960-01-05 | 3.03 |
| 2 | 1960-01-07 | 3.64 |
| 3 | 1960-01-08 | 3.10 |
| 4 | 1960-01-11 | 3.79 |
large_earthquake = 6
Large Events#
A label is added to Large Event data
large_mag_etas = max_mag_etas.copy()
large_mag_etas["Large Event"] = (large_mag_etas["Magnitude"] > large_earthquake).astype(int)
large_mag_etas["Date"] = large_mag_etas["Date"].dt.to_timestamp()
large_mag_etas['time_diff'] = large_mag_etas.loc[large_mag_etas['Large Event'] == 1, 'Date'].diff().dt.days
large_mag_etas['time_diff'].iloc[0] = pd.NA
Show code cell output
C:\Users\Vishal\AppData\Local\Temp\ipykernel_18808\609931718.py:5: SettingWithCopyWarning:
A value is trying to be set on a copy of a slice from a DataFrame
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
large_mag_etas['time_diff'].iloc[0] = pd.NA
large_mag_etas.head()
| Date | Magnitude | Large Event | time_diff | |
|---|---|---|---|---|
| 0 | 1960-01-02 | 4.25 | 0 | NaN |
| 1 | 1960-01-03 | 3.90 | 0 | NaN |
| 2 | 1960-01-04 | 4.24 | 0 | NaN |
| 3 | 1960-01-05 | 3.40 | 0 | NaN |
| 4 | 1960-01-06 | 3.47 | 0 | NaN |
large_mag_usgs = max_mag_usgs.copy()
large_mag_usgs["Large Event"] = (large_mag_usgs["mag"] > large_earthquake).astype(int)
large_mag_usgs["Date"] = large_mag_usgs["Date"].dt.to_timestamp()
large_mag_usgs['time_diff'] = large_mag_usgs.loc[large_mag_usgs['Large Event'] == 1, 'Date'].diff().dt.days
large_mag_usgs['time_diff'].iloc[0] = pd.NA
Show code cell output
C:\Users\Vishal\AppData\Local\Temp\ipykernel_18808\50271692.py:5: SettingWithCopyWarning:
A value is trying to be set on a copy of a slice from a DataFrame
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
large_mag_usgs['time_diff'].iloc[0] = pd.NA
large_mag_usgs.head()
| Date | mag | Large Event | time_diff | |
|---|---|---|---|---|
| 0 | 1960-01-02 | 4.04 | 0 | NaN |
| 1 | 1960-01-05 | 3.03 | 0 | NaN |
| 2 | 1960-01-07 | 3.64 | 0 | NaN |
| 3 | 1960-01-08 | 3.10 | 0 | NaN |
| 4 | 1960-01-11 | 3.79 | 0 | NaN |
Graphing Time Until Large Events#
import plotly.express as px
import plotly.graph_objects as go
Show code cell source
fig = go.Figure(data=[go.Bar(
x=large_mag_etas['Date'],
y=large_mag_etas['time_diff'],
)])
# Customize the bar appearance
fig.update_traces(marker_line_color='black', marker_line_width=1) # Set bar color to red and make the bar border black and thicker
# Customize the plot layout
fig.update_layout(
title='Time Difference Bar Chart (ETAS)',
xaxis_title='Date',
yaxis_title='Time Difference (Days)',
)
# Show the plot
fig.show()
Show code cell source
fig = go.Figure(data=[go.Bar(
x=large_mag_usgs['Date'],
y=large_mag_usgs['time_diff'],
)])
# Customize the bar appearance
fig.update_traces(marker_color='red', marker_line_color='black', marker_line_width=1) # Set bar color to red and make the bar border black and thicker
# Customize the plot layout
fig.update_layout(
title='Time Difference Bar Chart (USGS)',
xaxis_title='Date',
yaxis_title='Time Difference (Days)',
)
# Show the plot
fig.show()